# Replication archive for:
# Galos and Coppock. 2023. 
# Gender Composition Predicts Gender Bias: A Meta-reanalysis of Hiring Discrimination Audit Experiments. 
# Science Advances. Forthcoming.

library(tidyverse)
library(metafor)
library(broom)
library(scales)
source("code/helpers.R")

occupation_cates <- read_rds("data/occupation_cates.rds")

occupation_cates |> 
  group_by(study_id) |> 
  summarize(n = n()) |> 
  summarize(mean(n), median(n))

meta_fit_1 <-
  rma.uni(
    yi = estimate,
    sei = std.error,
    mods = ~ fraction_female,
    data = occupation_cates
  )

meta_fit_1_df <- tidy(meta_fit_1)
occupation_cates$weight <- weights(meta_fit_1)

preds_fit_1 <-
  meta_fit_1 %>% 
  tidy_predictions()

g <- 
  ggplot(occupation_cates) +
  aes(fraction_female, 
      estimate, 
      color = occupation_gender_distribution,
      shape = occupation_gender_distribution) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.35) +
  geom_linerange(aes(ymin = conf.low, ymax = conf.high), alpha = 0.25) +
  geom_point(alpha = 0.5, aes(size = weight), stroke = 0) +
  geom_ribbon(data = preds_fit_1, aes(ymin = conf.low, ymax = conf.high, color = NULL, shape = NULL), alpha = 0.55) +
  geom_line(data = preds_fit_1, alpha = 0.55, aes(color = NULL, shape = NULL)) +
  coord_cartesian(xlim = c(0, 1), ylim = c(-25, 25)) +
  scale_size_continuous(range = c(1, 4)) +
  scale_y_continuous(breaks = seq(-10, 10, 5)) +
  scale_x_continuous(label = percent_format()) +
  scale_color_brewer(palette = "Set2") +
  theme_bw() +
  theme(panel.grid.minor = element_blank(),
        legend.position = "none") +
  labs(x = "Status quo gender composition (share of women in occupation)",
       y = "Conditional average treatment effect")

g

# magnitude comparisons for RR ------------------------------

preds_list <-
  predict(meta_fit_1, newmods = c(`fraction_female` = c(0, 1)))
abs_preds <- abs(preds_list$pred)
difference_in_magnitude <- abs_preds[2] - abs_preds[1]

set.seed(343)
# sims <- 500
sims <- 5
difference_in_magnitude_bootstrap <- rep(NA, sims)
# this loop is slow, so we set it to 5 simulations for testing. Increase to 500 to recover estimates in the paper
for(i in 1:sims){

  occupation_cates_bootstrap <- 
    sample_n(ungroup(occupation_cates), 
             size = nrow(occupation_cates), 
             replace = TRUE)
  
  meta_fit_bootstrap <-
    rma.uni(
      yi = estimate,
      sei = std.error,
      mods = ~ fraction_female,
      data = occupation_cates_bootstrap
    )
  
  preds_list <-
    predict(meta_fit_bootstrap, newmods = c(`fraction_female` = c(0, 1)))
  
  abs_preds <- abs(preds_list$pred)
  difference_in_magnitude_bootstrap[i] <- abs_preds[2] - abs_preds[1]
  print(i)
}
print(difference_in_magnitude)
print(sd(difference_in_magnitude_bootstrap))
